knitr::opts_chunk$set(echo = TRUE)
library(readxl)
library(plyr)
library(tibble)
library(dplyr)
library(tidyverse)
library(stringr)
library(knitr)
library(forcats)
library(ggplot2)
library(hrbrthemes) #for ggplot2
library(bibliometrix)
library(igraph)
Data loading
# manually extracted data
xldata <- "./Data_extraction_postcrosschecking.xlsx"
# bibliometric data
bib <- convert2df("./bibliometric.bib", dbsource = "scopus", format = "bibtex")
Data organisation
Splitting list of tabs into separate dataframes
excel_sheets(path = xldata)
tab_names <- excel_sheets(path = xldata)
# creating a list of dataframes per tab
list_all <- lapply(tab_names, function(x) read_excel(path = xldata, sheet = x))
# assigning tab names to each dataframe
names(list_all) <- tab_names
# get dataframes out of list
list2env(list_all, .GlobalEnv)
Addressing objectives 1 and 2
Map the SR literature across disciplines e.g., the types of environmental exposures and traits synthesised, the proportion of SRs that examine inter- versus trans-generational effects, and which disciplines dominate the non-genetic inheritance SR literature. The map will highlight gaps in the literature that remain to be synthesised or have very few SRs.
Present discipline-specific research patterns by summarising commonalities and disparities between disciplines (e.g., do SRs of specific environmental exposures dominate one discipline and not others? Do some disciplines focus on inter-generational effects, and another on trans-generational effects?).
Percent of SR’s within disciplines
count_discipline <- Review_info %>%
count(discipline_code) %>%
arrange(desc(n))
percent_discipline <- count_discipline %>%
mutate(percent = (n/sum(n)) * 100)
percent_discipline$discipline_code <- factor(percent_discipline$discipline_code,
level = percent_discipline$discipline_code[order(percent_discipline$n, decreasing = FALSE)])
ggplot(percent_discipline, aes(x = discipline_code, y = percent)) + geom_col(aes(fill = ""),
width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
xlab("Discipline") + scale_fill_manual(values = c("#919191")) + theme(legend.position = "none",
axis.title.x = element_text(size = 10))

Inter- vs trans-generational effects within and between disciplines
Merged_inter_vs_trans <- merge(Inter_vs_trans_info, Review_info)
count_inter_vs_trans <- Merged_inter_vs_trans %>%
count(inter_vs_trans_code, by = discipline_code) %>%
arrange(desc(n))
percent_inter_vs_trans <- count_inter_vs_trans %>%
mutate(percent = (n/sum(n)) * 100)
percent_inter_vs_trans <- percent_inter_vs_trans %>%
rename(discipline_code = by)
ggplot(percent_inter_vs_trans, aes(x = inter_vs_trans_code, y = percent)) + geom_col(aes(fill = discipline_code),
width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.
Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
Inter_vs_trans_info_unique <- Inter_vs_trans_code_info %>%
select(controlled_vocab_inter_vs_trans, description)
unique(Inter_vs_trans_info_unique)
## # A tibble: 3 x 2
## controlled_vocab_inter_vs_trans description
## <chr> <chr>
## 1 inter inter-generational
## 2 trans trans-generational
## 3 unclear unclear
Descendant generations within and between disciplines
merged_descendant_generat <- merge(Descendant_generat_info, Review_info)
count_descendant_generat <- merged_descendant_generat %>%
count(descendant_generat_code, by = discipline_code) %>%
arrange(desc(n))
percent_descendant_generat <- count_descendant_generat %>%
mutate(percent = (n/sum(n)) * 100)
percent_descendant_generat <- percent_descendant_generat %>%
rename(discipline_code = by)
ggplot(percent_descendant_generat, aes(x = descendant_generat_code, y = percent)) +
geom_col(aes(fill = discipline_code), width = 0.7) + theme_light() + coord_flip() +
scale_y_continuous(name = "Percent") + theme(legend.position = "bottom", axis.title.x = element_text(size = 10),
axis.title.y = element_blank()) + guides(fill = guide_legend(title = "Discipline:"))

Terminology used within and between disciplines
I.e., does the use of inter- and trans-generational inheritance match our definitions (see Fig. 1)
count_terminology <- Review_info %>%
count(terminology_code, by = discipline_code) %>%
arrange(desc(n))
percent_terminology <- count_terminology %>%
mutate(percent = (n/sum(n)) * 100)
percent_terminology <- percent_terminology %>%
rename(discipline_code = by)
ggplot(percent_terminology, aes(x = terminology_code, y = percent)) + geom_col(aes(fill = discipline_code),
width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.
Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
terminology_code_unique <- Terminology_code_info %>%
select(controlled_vocab_terminology, description_terminology)
unique(terminology_code_unique)
## # A tibble: 4 x 2
## controlled_vocab_terminology description_terminology
## <chr> <chr>
## 1 yes yes
## 2 no no
## 3 not used does not use these terms
## 4 unclear unclear
Types of non-genetic transmission within and between disciplines
I.e., are the non-genetic effects conferred through the matriline or patriline etc.
Merged_transmission_info <- merge(Transmission_info, Review_info)
count_transmission <- Merged_transmission_info %>%
count(transmission_code, by = discipline_code) %>%
arrange(desc(n))
percent_transmission <- count_transmission %>%
mutate(percent = (n/sum(n)) * 100)
percent_transmission <- percent_transmission %>%
rename(discipline_code = by)
ggplot(percent_transmission, aes(x = transmission_code, y = percent)) + geom_col(aes(fill = discipline_code),
width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.
Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
Transmission_info_unique <- Transmission_code_info %>%
select(controlled_vocab_transmission, description)
unique(Transmission_info_unique)
## # A tibble: 4 x 2
## controlled_vocab_transmi… description
## <chr> <chr>
## 1 matriline matriline
## 2 patriline patriline
## 3 not separated not separated (i.e., the primary studies used do no…
## 4 unclear unclear/unspecefied
F0 environmental manipulations within and between disciplines
merged_F0_env <- merge(F0_env_info, Review_info)
count_F0_env <- merged_F0_env %>%
count(F0_env_code, by = discipline_code) %>%
arrange(desc(n))
percent_F0_env <- count_F0_env %>%
mutate(percent = (n/sum(n)) * 100)
percent_F0_env <- percent_F0_env %>%
rename(discipline_code = by)
ggplot(percent_F0_env, aes(x = F0_env_code, y = percent)) + geom_col(aes(fill = discipline_code),
width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.
Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
F0_env_info_unique <- F0_env_code_info %>%
select(controlled_vocab_F0_env, description)
unique(F0_env_info_unique)
## # A tibble: 9 x 2
## controlled_vocab_F0_e… description
## <chr> <chr>
## 1 diet diet
## 2 human induced human induced pollutant/toxin
## 3 environmental composi… natural variation in environmental composition (e.g., …
## 4 psychological stress psychological stress (e.g., post-natal separation)
## 5 temperature temperature
## 6 human health ‘human health’ related environments (e.g., tobacco, al…
## 7 population demographic differences in population demographics (e.g., populati…
## 8 light light and/or photoperiod
## 9 other other/unclear
Environmental effect direction within and between disciplines
I.e., are the effects of environment predicted to have negative, positive, or neutral effects on offspring phenotype
merged_env_eff <- merge(Env_eff_diection_info, Review_info)
count_env_eff <- merged_env_eff %>%
count(env_eff_direction_code, by = discipline_code) %>%
arrange(desc(n))
percent_env_eff <- count_env_eff %>%
mutate(percent = (n/sum(n)) * 100)
percent_env_eff <- percent_env_eff %>%
rename(discipline_code = by)
ggplot(percent_env_eff, aes(x = env_eff_direction_code, y = percent)) + geom_col(aes(fill = discipline_code),
width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.
Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
Env_eff_direction_info_unique <- Env_eff_direction_code_info %>%
select(controlled_vocab_env_eff_direction, description)
unique(Env_eff_direction_info_unique)
## # A tibble: 3 x 2
## controlled_vocab_env_eff_direction description
## <chr> <chr>
## 1 negative negative
## 2 positive positive
## 3 unclear unclear
F0 environmental exposure timing within and between disciplines
Note: We excluded SRs that solely focused on environmental exposures that occured when the F0 generation was a fetus (i.e., pre-natal). However, some broader SRs (i.e., eco evo SRs) included primary studies where the F0 generation was exposed pre-natally. This was therefore coded in our data.
merged_exposure_timing <- merge(Exposure_timing_info, Review_info)
count_exposure_timing <- merged_exposure_timing %>%
count(exposure_timing_code, by = discipline_code) %>%
arrange(desc(n))
percent_exposure_timing <- count_exposure_timing %>%
mutate(percent = (n/sum(n)) * 100)
percent_exposure_timing <- percent_exposure_timing %>%
rename(discipline_code = by)
ggplot(percent_exposure_timing, aes(x = exposure_timing_code, y = percent)) + geom_col(aes(fill = discipline_code),
width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used, and a description of what the controlled vocabulary means.
Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
Exposure_timing_info_unique <- Exposure_timing_code_info %>%
select(controlled_vocab_exposure_timing, description)
unique(Exposure_timing_info_unique)
## # A tibble: 5 x 2
## controlled_vocab_exposure_timing description
## <chr> <chr>
## 1 pre-natal pre-natally
## 2 post-natal post-natal before sexual maturity
## 3 post-sexual maturity after sexual maturity but before gestation
## 4 gestation during gestation
## 5 unclear other/unclear
Descendant traits within and between disciplines
merged_descendant_trait <- merge(Descendant_trait_info, Review_info)
count_descendant_trait <- merged_descendant_trait %>%
count(descendant_trait_code, by = discipline_code) %>%
arrange(desc(n))
percent_descendant_trait <- count_descendant_trait %>%
mutate(percent = (n/sum(n)) * 100)
percent_descendant_trait <- percent_descendant_trait %>%
rename(discipline_code = by)
ggplot(percent_descendant_trait, aes(x = descendant_trait_code, y = percent)) + geom_col(aes(fill = discipline_code),
width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.
Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
Descendant_trait_info_unique <- Descendant_trait_code_info %>%
select(controlled_vocab_descendant_trait, description)
unique(Descendant_trait_info_unique)
## # A tibble: 9 x 2
## controlled_vocab_descendan… description
## <chr> <chr>
## 1 physiological physiological (e.g., immune function, insulin lev…
## 2 morphological morphological (e.g., body size, adiposity, colour…
## 3 reproductive reproductive (e.g., fecundity and sexual trait me…
## 4 life history life-history (e.g., developmental rate, aging and…
## 5 survival descendant survival/aging (must be post-natally)
## 6 behavioural behavioural (e.g., response to stimuli, anxiety, …
## 7 molecular molecular (e.g., gene expression, DNA methylation…
## 8 health health and disease (e.g., disease prevalence )
## 9 unclear other/unclear
Descendant sex within and between disciplines
merged_descendant_sex <- merge(Descendant_sex_info, Review_info)
count_descendant_sex <- merged_descendant_sex %>%
count(descendant_sex_code, by = discipline_code) %>%
arrange(desc(n))
percent_descendant_sex <- count_descendant_sex %>%
mutate(percent = (n/sum(n)) * 100)
percent_descendant_sex <- percent_descendant_sex %>%
rename(discipline_code = by)
ggplot(percent_descendant_sex, aes(x = descendant_sex_code, y = percent)) + geom_col(aes(fill = discipline_code),
width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.
Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
Descendant_sex_info_unique <- Descendant_sex_code_info %>%
select(controlled_vocab_descendant_sex, description)
unique(Descendant_sex_info_unique)
## # A tibble: 3 x 2
## controlled_vocab_descendant_sex description
## <chr> <chr>
## 1 males males
## 2 females females
## 3 unclear unclear
Descendant age within and between disciplines
Note: We excluded SRs that solely focused on fetal traits in descendants. However, some broader SRs included primary studies that mearured fetal traits. This was therefore coeded in our data.
merged_descendant_age <- merge(Descendant_age_info, Review_info)
count_descendant_age <- merged_descendant_age %>%
count(descendant_age_code, by = discipline_code) %>%
arrange(desc(n))
percent_descendant_age <- count_descendant_age %>%
mutate(percent = (n/sum(n)) * 100)
percent_descendant_age <- percent_descendant_age %>%
rename(discipline_code = by)
ggplot(percent_descendant_age, aes(x = descendant_age_code, y = percent)) + geom_col(aes(fill = discipline_code),
width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.
Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
Descendant_age_info_unique <- Descendant_age_code_info %>%
select(controlled_vocab_descendant_age, description)
unique(Descendant_age_info_unique)
## # A tibble: 5 x 2
## controlled_vocab_descendant_… description
## <chr> <chr>
## 1 fetal fetal/embryonic
## 2 juvenile juvenile (post-embryonic/birth prior to sexual …
## 3 adult adult
## 4 ongoing ongoing
## 5 unclear other/unclear
Higher taxononmic groups within and between disciplines
Merged_higher_taxon <- merge(Higher_taxon_info, Review_info)
count_higher_taxon <- Merged_higher_taxon %>%
count(taxon_code, by = discipline_code) %>%
arrange(desc(n))
percent_higher_taxon <- count_higher_taxon %>%
mutate(percent = (n/sum(n)) * 100)
percent_higher_taxon <- percent_higher_taxon %>%
rename(discipline_code = by)
ggplot(percent_higher_taxon, aes(x = taxon_code, y = percent)) + geom_col(aes(fill = discipline_code),
width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.
Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
Higher_taxon_info_unique <- Taxon_code_info %>%
select(controlled_vocab_taxon, description)
unique(Higher_taxon_info_unique)
## # A tibble: 5 x 2
## controlled_vocab_taxon description
## <chr> <chr>
## 1 vertebrates vertebrates
## 2 invertebrates invertebrates
## 3 plants plants
## 4 other other
## 5 unclear unclear
Descendent generation vs terminology use across disciplines
Does the terminology used (i.e., inter- vs trans-generational) match our definition (based on generation, sex and taxa) within the descendant generations examined
generat_terminology <- merge(Descendant_generat_info, Review_info)
count_generat_terminology <- generat_terminology %>%
count(descendant_generat_code, by = terminology_code) %>%
arrange(desc(n))
percent_generat_terminology <- count_generat_terminology %>%
mutate(percent = (n/sum(n)) * 100)
percent_generat_terminology <- percent_generat_terminology %>%
rename(terminology_code = by)
ggplot(percent_generat_terminology, aes(x = descendant_generat_code, y = percent)) +
geom_col(aes(fill = terminology_code), width = 0.7) + theme_light() + coord_flip() +
scale_y_continuous(name = "Percent") + theme(legend.position = "bottom", axis.title.x = element_text(size = 10),
axis.title.y = element_blank()) + guides(fill = guide_legend(title = "Terminology:"))

Time trend of the number of SRs per year
Publication_info %>%
count(year) %>%
ggplot(aes(x = year, y = n)) + geom_area(fill = "#919191", alpha = 1) + geom_line(color = "skyblue",
size = 1) + geom_point(size = 1, color = "blue") + theme_minimal() + scale_x_continuous(name = "",
limits = c(2005, 2020)) + scale_y_continuous(name = "Article count", limits = c(0,
10)) + ggtitle("Publication year") + theme(plot.title = element_text(hjust = 0.5))

Adressing objective 3
- Conduct bibliometric analyses of co-author networks and common terminology use across and within disciplines.
The following visualisations allow us to view bibliometric patterns from data exported directly from scopus. We are able to view common keyword use, author networks, affiliations, and citation patterns.
Keyword matrix plot
NetMatrix_keywords <- biblioNetwork(bib, analysis = "co-occurrences", network = "keywords", sep = ";")
NetMatrix_keywords_plot <- networkPlot(NetMatrix_keywords, normalize = "association", n = 10, Title = "Keyword co-occurrences", type = "fruchterman",
size.cex = TRUE, size = 30, remove.multiple = F, edgesize = 10, labelsize = 3, label.cex = TRUE, edges.min = 2, cluster = "edge_betweenness")

Thematic map
map_thematic <- thematicMap(bib, field = "ID", n = 1000, minfreq = 5, stemming = FALSE, size = 0.5, n.labels = 1, repel = TRUE)
plot(map_thematic$map)

Author collaboration network
NetMatrix_authors <- biblioNetwork(bib, analysis = "collaboration", network = "authors", sep = ";")
NetMatrix_authors_plot <- networkPlot(NetMatrix_authors, n = 20, Title = "Author collaboration", type = "auto", size = 15, size.cex = TRUE,
edgesize = 10, labelsize = 1)

Country collaboration network
bib_sco2 <- metaTagExtraction(bib, Field = "AU_CO", sep = ";") #we need to extract countries from the affiliations first
# bib_sco2$AU_CO[1:10]
NetMatrix_country <- biblioNetwork(bib_sco2, analysis = "collaboration", network = "countries", sep = ";")
NetMatrix_country_plot <- networkPlot(NetMatrix_country, n = 50, Title = "Country collaboration", type = "auto", size = TRUE, remove.multiple = FALSE,
labelsize = 1.5)

Addressing objective 4
- Conduct a critical appraisal of the SR literature to assess the rigour, transparency, and risk of bias.
The following visualisations allow us to see the average scores across each CEESAT questions as well as how each individual SR scored for each CEESAT questions. This will allow us to assess the quality and Risk of Bias of the SR literature.
Note: CEESAT questions and scoring criteria can be found in Appendix_S3 on the open Science Framework https://osf.io/detvk/.
CEESAT score summary across SRs
This plot shows the average CEESAT score per question. CEESAT questions and scoring criteria can be found in Appendix_S3 on the open Science Framework https://osf.io/detvk/.
# calculating the % of scores within each questions
count_ceesat_score <- ceesat_long %>%
count(score, by = question)
percent_ceesat_score <- count_ceesat_score %>%
mutate(percent = (n/sum(n)) * 100)
percent_ceesat_score <- percent_ceesat_score %>%
rename(question = by)
percent_ceesat_score$question <- as.factor(percent_ceesat_score$question)
percent_ceesat_score$question <- factor(percent_ceesat_score$question, levels(percent_ceesat_score$question)[length(percent_ceesat_score$question):1]) #reverse the order of questions
percent_ceesat_score$score <- as.factor(percent_ceesat_score$score)
percent_ceesat_score$score <- factor(percent_ceesat_score$score, levels(percent_ceesat_score$score)[c(2, 3, 1, 4)]) #set the order of levels for assessment scores:
summaryplot <- ggplot(data = percent_ceesat_score, x = question, y = percent) + geom_col(mapping = aes(x = question, y = percent, fill = score),
width = 0.7, position = "fill", color = "black") + coord_flip(ylim = c(0, 1)) + guides(fill = guide_legend(reverse = TRUE)) + scale_fill_manual(values = c("yellow",
"green", "orange", "red")) + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank()) + ylab("Percent") + xlab("CEESAT question") + guides(fill = guide_legend(title = "Score:"))
summaryplot

Individual CEESAT Scores
This plot shows the CEESAT score per SR. CEESAT questions and scoring criteria can be found in Appendix_S3 on the open Science Framework https://osf.io/detvk/.
scoresplot <- ggplot(data = ceesat_long, aes(y = id, x = question)) + geom_tile(color = "black", fill = "white", size = 0.8) + geom_point(aes(color = as.factor(score)),
size = 5) + scale_x_discrete(position = "top") + guides(color = guide_legend(reverse = TRUE)) + scale_color_manual(values = c("orange",
"yellow", "green", "red"), name = "Score:") + theme_minimal() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), legend.position = "bottom", legend.background = element_rect(linetype = "solid", colour = "grey"),
legend.key.size = unit(0.75, "cm"), legend.text = element_text(size = 12), axis.text.y = element_text(size = 10, color = "black"),
axis.text.x = element_text(angle = 45, hjust = 0), plot.margin = unit(c(1, 1, 1, 0), "cm")) + ylab("Study ID") + xlab("CEESAT question")
scoresplot

---
title: "Nongenetic inhertance map of systematic reviews"
author: "Erin Macartney, Szymek Drobniak, Shinichi Nakagawa, Malgorzata Lagisz"
output: 
    
    rmdformats::readthedown:
      code_folding: hide
      code_download: true
      toc_depth: 4
editor_options: 
  chunk_output_type: console
---
    
```{r, include = FALSE}
knitr::opts_chunk$set(
message = FALSE,
warning = FALSE,
cache = TRUE, 
tidy = TRUE, 
echo = TRUE
)

rm(list = ls())
```


```{r setup, results = 'hide'}

knitr::opts_chunk$set(echo = TRUE)

library(readxl)
library(plyr)
library(tibble)
library(dplyr)
library(tidyverse)
library(stringr)
library(knitr)
library(forcats)
library(ggplot2)
library(hrbrthemes) #for ggplot2
library(bibliometrix)
library(igraph)

```
### Data loading

``` {r, results = 'hide'}
#manually extracted data
xldata <- "./Data_extraction_postcrosschecking.xlsx"

#bibliometric data
bib <- convert2df("./bibliometric.bib", dbsource = "scopus", format = "bibtex")
```


### Data organisation
Splitting list of tabs into separate dataframes

``` {r, results = 'hide'}

excel_sheets(path = xldata)
tab_names <- excel_sheets(path = xldata)


#creating a list of dataframes per tab
list_all <- lapply(tab_names, function(x) read_excel(path = xldata, sheet = x))

#assigning tab names to each dataframe
names(list_all) <- tab_names

#get dataframes out of list
list2env(list_all, .GlobalEnv)
```

## Addressing objectives 1 and 2 {.tabset}

1)	Map the SR literature across disciplines e.g., the types of environmental exposures and traits synthesised, the proportion of SRs that examine inter- versus trans-generational effects, and which disciplines dominate the non-genetic inheritance SR literature. The map will highlight gaps in the literature that remain to be synthesised or have very few SRs.

2)	Present discipline-specific research patterns by summarising commonalities and disparities between disciplines (e.g., do SRs of specific environmental exposures dominate one discipline and not others? Do some disciplines focus on inter-generational effects, and another on trans-generational effects?).

### Percent of SR's within disciplines

``` {r}

count_discipline <-Review_info %>% count(discipline_code) %>% arrange(desc(n)) 
percent_discipline <- count_discipline %>% mutate(percent = (n/sum(n))*100)
percent_discipline$discipline_code <- factor(percent_discipline$discipline_code, level = percent_discipline$discipline_code[order(percent_discipline$n, decreasing = FALSE)])

ggplot(percent_discipline, aes(x = discipline_code, y = percent)) + 
  geom_col(aes(fill = ""), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  xlab("Discipline") + 
  scale_fill_manual(values = c("#919191")) +
  theme(legend.position = "none", axis.title.x = element_text(size = 10))

```

### Inter- vs trans-generational effects within and between disciplines

```{r}

Merged_inter_vs_trans <- merge(Inter_vs_trans_info, Review_info)

count_inter_vs_trans <- Merged_inter_vs_trans %>% count(inter_vs_trans_code, by = discipline_code ) %>% arrange(desc(n))
percent_inter_vs_trans <- count_inter_vs_trans %>% mutate(percent = (n/sum(n))*100)

percent_inter_vs_trans <-percent_inter_vs_trans %>%
  rename(
    discipline_code = by
  )

ggplot(percent_inter_vs_trans, aes(x = inter_vs_trans_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Inter_vs_trans_info_unique <- Inter_vs_trans_code_info %>% 
  select(
  controlled_vocab_inter_vs_trans, description
  ) 
unique(Inter_vs_trans_info_unique)
```

### Descendant generations within and between disciplines

```{r}

merged_descendant_generat <- merge(Descendant_generat_info, Review_info)

count_descendant_generat <- merged_descendant_generat %>% count(descendant_generat_code, by = discipline_code) %>% arrange(desc(n))
percent_descendant_generat <- count_descendant_generat %>% mutate(percent = (n/sum(n))*100)

percent_descendant_generat <- percent_descendant_generat %>% 
  rename (
    discipline_code = by
  )

ggplot(percent_descendant_generat, aes(x = descendant_generat_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

### Terminology used within and between disciplines
I.e., does the use of inter- and trans-generational inheritance match our definitions (see Fig. 1)

``` {r}

count_terminology <- Review_info %>% count(terminology_code, by = discipline_code) %>% arrange(desc(n))
percent_terminology <- count_terminology %>% mutate(percent = (n/sum(n))*100)

percent_terminology<- percent_terminology %>%
  rename(
    discipline_code = by
  )

ggplot(percent_terminology, aes(x = terminology_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() +  
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
terminology_code_unique <- Terminology_code_info %>% 
  select(
    controlled_vocab_terminology, description_terminology
  )
unique(terminology_code_unique)
```

### Types of non-genetic transmission within and between disciplines
I.e., are the non-genetic effects conferred through the matriline or patriline etc.

```{r}

Merged_transmission_info <- merge(Transmission_info, Review_info)

count_transmission <- Merged_transmission_info %>%  count(transmission_code, by = discipline_code) %>% arrange(desc(n))
percent_transmission <- count_transmission %>% mutate(percent = (n/sum(n))*100)

percent_transmission <-percent_transmission %>%
  rename(
    discipline_code = by
  )

ggplot(percent_transmission, aes(x = transmission_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Transmission_info_unique <- Transmission_code_info %>% 
  select(
    controlled_vocab_transmission, description
  )
unique(Transmission_info_unique) 

```

### F0 environmental manipulations within and between disciplines

```{r}

merged_F0_env <- merge(F0_env_info, Review_info)

count_F0_env<- merged_F0_env %>% count(F0_env_code, by = discipline_code ) %>% arrange(desc(n))
percent_F0_env <- count_F0_env %>% mutate(percent = (n/sum(n))*100)
 
percent_F0_env <-percent_F0_env %>%
  rename(
    discipline_code = by
  )

ggplot(percent_F0_env, aes(x = F0_env_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
F0_env_info_unique <- F0_env_code_info %>% 
  select(
    controlled_vocab_F0_env, description
  )
unique(F0_env_info_unique)
```

### Environmental effect direction within and between disciplines
I.e., are the effects of environment predicted to have negative, positive, or neutral effects on offspring phenotype

```{r}

merged_env_eff <- merge(Env_eff_diection_info, Review_info)

count_env_eff <- merged_env_eff %>% count(env_eff_direction_code, by = discipline_code) %>% arrange(desc(n))
percent_env_eff <- count_env_eff %>% mutate(percent = (n/sum(n))*100)

percent_env_eff <-percent_env_eff %>%
  rename(
    discipline_code = by
  )

ggplot(percent_env_eff, aes(x = env_eff_direction_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Env_eff_direction_info_unique <- Env_eff_direction_code_info %>% 
  select(
    controlled_vocab_env_eff_direction, description
  )
unique(Env_eff_direction_info_unique) 
```

### F0 environmental exposure timing within and between disciplines
**Note:** We excluded SRs that solely focused on environmental exposures that occured when the F0 generation was a fetus (i.e., pre-natal). However,  some broader SRs (i.e., eco evo SRs) included primary studies where the F0 generation was exposed pre-natally. This was therefore coded in our data. 

```{r}

merged_exposure_timing <- merge(Exposure_timing_info, Review_info)

count_exposure_timing <- merged_exposure_timing %>% count(exposure_timing_code, by = discipline_code) %>% arrange(desc(n))
percent_exposure_timing <- count_exposure_timing %>% mutate(percent = (n/sum(n))*100)

percent_exposure_timing <-percent_exposure_timing %>%
  rename (
    discipline_code = by
  )

ggplot(percent_exposure_timing, aes(x = exposure_timing_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used, and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Exposure_timing_info_unique<- Exposure_timing_code_info %>% 
  select(
    controlled_vocab_exposure_timing, description
  )
unique(Exposure_timing_info_unique) 
```

### Descendant traits within and between disciplines 

```{r}

merged_descendant_trait <- merge(Descendant_trait_info, Review_info)

count_descendant_trait <- merged_descendant_trait %>% count(descendant_trait_code, by = discipline_code) %>% arrange(desc(n))
percent_descendant_trait <- count_descendant_trait %>% mutate(percent = (n/sum(n))*100)

percent_descendant_trait <- percent_descendant_trait %>%
  rename(
    discipline_code = by
  )

ggplot(percent_descendant_trait, aes(x = descendant_trait_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() + 
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Descendant_trait_info_unique<-Descendant_trait_code_info %>% 
  select(
     controlled_vocab_descendant_trait,description
  )
unique(Descendant_trait_info_unique) 
```

### Descendant sex within and between disciplines

```{r}

merged_descendant_sex <- merge(Descendant_sex_info, Review_info)

count_descendant_sex <- merged_descendant_sex %>% count(descendant_sex_code, by = discipline_code) %>% arrange(desc(n))
percent_descendant_sex <- count_descendant_sex  %>% mutate(percent = (n/sum(n))*100)

percent_descendant_sex <- percent_descendant_sex %>%
  rename(
    discipline_code = by
           )

ggplot(percent_descendant_sex, aes(x = descendant_sex_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Descendant_sex_info_unique<- Descendant_sex_code_info%>% 
  select(
    controlled_vocab_descendant_sex, description
  )
unique(Descendant_sex_info_unique)
```

### Descendant age within and between disciplines
**Note:** We excluded SRs that solely focused on fetal traits in descendants. However, some broader SRs included primary studies that mearured fetal traits. This was therefore coeded in our data. 

```{r}

merged_descendant_age <- merge(Descendant_age_info, Review_info)

count_descendant_age <- merged_descendant_age %>% count(descendant_age_code, by = discipline_code) %>% arrange(desc(n))
percent_descendant_age <- count_descendant_age %>% mutate(percent = (n/sum(n))*100)

percent_descendant_age <- percent_descendant_age %>%
  rename(
    discipline_code = by
  )

ggplot(percent_descendant_age, aes(x = descendant_age_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Descendant_age_info_unique<- Descendant_age_code_info %>% 
  select(
    controlled_vocab_descendant_age, description
  )
unique(Descendant_age_info_unique) 
```

### Higher taxononmic groups within and between disciplines 

``` {r}

Merged_higher_taxon <- merge(Higher_taxon_info, Review_info)

count_higher_taxon <- Merged_higher_taxon %>% count(taxon_code, by = discipline_code) %>% arrange(desc(n))
percent_higher_taxon <- count_higher_taxon %>% mutate(percent = (n/sum(n))*100)

percent_higher_taxon<-percent_higher_taxon %>%
  rename(
    discipline_code = by
  )

ggplot(percent_higher_taxon, aes(x = taxon_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Higher_taxon_info_unique<- Taxon_code_info %>% 
  select(
    controlled_vocab_taxon, description
  )
unique(Higher_taxon_info_unique)
```

### Descendent generation vs terminology use across disciplines
Does the terminology used (i.e., inter- vs trans-generational) match our definition (based on generation, sex and taxa) within the  descendant generations examined

``` {r}

generat_terminology <- merge(Descendant_generat_info, Review_info)
  
count_generat_terminology <- generat_terminology %>% count(descendant_generat_code, by = terminology_code) %>% arrange(desc(n))
percent_generat_terminology <- count_generat_terminology %>% mutate(percent = (n/sum(n))*100)

percent_generat_terminology<-percent_generat_terminology %>%
  rename(
    terminology_code = by
  )

ggplot(percent_generat_terminology, aes(x = descendant_generat_code, y = percent)) + 
  geom_col(aes(fill = terminology_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Terminology:"))
```

### Time trend of the number of SRs per year

``` {r}

Publication_info %>% count(year) %>% ggplot(aes(x = year, y = n)) + 
  geom_area(fill = '#919191', alpha = 1) +
  geom_line(color = 'skyblue', size = 1) + 
  geom_point(size=1, color = 'blue') +
  theme_minimal() +
  scale_x_continuous(name = "", limits = c(2005, 2020)) +
  scale_y_continuous(name = "Article count", limits = c(0, 10)) +
  ggtitle("Publication year") + 
  theme(plot.title = element_text(hjust = 0.5))

```

## Adressing objective 3 {.tabset}

3)	Conduct bibliometric analyses of co-author networks and common terminology use across and within disciplines. 

The following visualisations allow us to view bibliometric patterns from data exported directly from scopus. We are able to view common keyword use, author networks, affiliations, and citation patterns. 

### Author, country, and citation summary plots

``` {r, message = FALSE, warning = FALSE, results = 'hide'}
results1 <- biblioAnalysis(bib) #run basic standard descriptive analysis of the dataset (data frame)
summary(results1, k = 7, pause = F, width = 130) #produces a sequence of standard summary tables displayed in the console
plot(results1, k = 7, pause = F)
```

### Keyword matrix plot

``` {r, message = FALSE, warning = FALSE}
NetMatrix_keywords <- biblioNetwork(bib, analysis = "co-occurrences", network = "keywords", sep = ";")
NetMatrix_keywords_plot <- networkPlot(NetMatrix_keywords, normalize="association", n = 10, Title = "Keyword co-occurrences", type = "fruchterman", size.cex = TRUE, size = 30, remove.multiple = F, edgesize = 10, labelsize = 3, label.cex = TRUE,edges.min = 2, cluster = "edge_betweenness")
```

### Thematic map

``` {r, message = FALSE, warning = FALSE}
map_thematic <- thematicMap(bib, field = "ID", n = 1000, minfreq = 5, stemming = FALSE, size = 0.5, n.labels = 1, repel = TRUE)
plot(map_thematic$map)
```

### Author collaboration network

``` {r, message = FALSE, warning = FALSE}
NetMatrix_authors <- biblioNetwork(bib, analysis = "collaboration",  network = "authors", sep = ";")
NetMatrix_authors_plot <- networkPlot(NetMatrix_authors,  n = 20, Title = "Author collaboration", type = "auto", size = 15, size.cex = TRUE, edgesize = 10, labelsize = 1) 
```

### Country collaboration network

```{r, message = FALSE, warning = FALSE}
bib_sco2 <- metaTagExtraction(bib, Field = "AU_CO", sep = ";") #we need to extract countries from the affiliations first
#bib_sco2$AU_CO[1:10]
NetMatrix_country <- biblioNetwork(bib_sco2, analysis = "collaboration", network = "countries", sep = ";")
NetMatrix_country_plot <-  networkPlot(NetMatrix_country, n = 50, Title = "Country collaboration", type = "auto", size=TRUE, remove.multiple=FALSE, labelsize=1.5)
```

## Addressing objective 4

4)	Conduct a critical appraisal of the SR literature to assess the rigour, transparency, and risk of bias. 

The following visualisations allow us to see the average scores across each CEESAT questions as well as how each individual SR scored for each CEESAT questions. This will allow us to assess the quality and Risk of Bias of the SR literature. 

**Note:** CEESAT questions and scoring criteria can be found in Appendix_S3 on the open Science Framework https://osf.io/detvk/.

### Blinding paper ID and wrangling data into long format

Paper ID was blinded for the pilot but will not be blinded for the full study

```{r}
#blinding authors
Assessment$id <- paste("ID", c(1:length(Assessment$id)), sep = "")
#shortening column names
names(Assessment) <- gsub("CEESAT_", "", names(Assessment), fixed = TRUE)
#selecting only the columns with scores
#selecting only the columns with scores
Assessment_reduced <- select(Assessment, c("id", !ends_with("_comment")))
#wrangling data into long format
ceesat_long <- gather(Assessment_reduced, question, score, Q1.1:Q8.1, factor_key=TRUE)
```

### CEESAT score summary across SRs
This plot shows the average CEESAT score per question. 
CEESAT questions and scoring criteria can be found in Appendix_S3 on the open Science Framework https://osf.io/detvk/.

``` {r}
#calculating the % of scores within each questions 
count_ceesat_score <- ceesat_long %>% count(score, by = question) 
percent_ceesat_score <- count_ceesat_score %>% mutate(percent = (n/sum(n))*100)
percent_ceesat_score <- percent_ceesat_score %>%
  rename(
    question = by
  )
percent_ceesat_score$question <- as.factor(percent_ceesat_score$question)
percent_ceesat_score$question <- factor(percent_ceesat_score$question, levels(percent_ceesat_score$question)[length(percent_ceesat_score$question):1]) #reverse the order of questions
percent_ceesat_score$score <- as.factor(percent_ceesat_score$score)
percent_ceesat_score$score <- factor(percent_ceesat_score$score, levels(percent_ceesat_score$score)[c(2,3,1,4)]) #set the order of levels for assessment scores:
summaryplot <- ggplot(data = percent_ceesat_score, x = question, y = percent) +
  geom_col(mapping = aes(x = question, y = percent, fill = score), width = 0.7,
           position = "fill", color = "black") +
  coord_flip(ylim = c(0, 1)) +
  guides(fill = guide_legend(reverse = TRUE)) +
  scale_fill_manual(values = c("yellow","green", "orange","red")) +
  theme(legend.position = "bottom", panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.background = element_blank()) + 
  ylab("Percent") + xlab("CEESAT question") +
  guides(fill=guide_legend(title="Score:")) 
summaryplot
```

### Individual CEESAT Scores 
This plot shows the CEESAT score per SR.
CEESAT questions and scoring criteria can be found in Appendix_S3 on the open Science Framework https://osf.io/detvk/.

``` {r}
scoresplot <- ggplot(data = ceesat_long, aes(y = id, x = question)) +
  geom_tile(color="black", fill="white", size = 0.8) +
  geom_point(aes(color = as.factor(score)), size = 5) +
  scale_x_discrete(position = "top") +
  guides(color = guide_legend(reverse = TRUE)) +
  scale_color_manual(values = c("orange","yellow","green","red" ), name = "Score:") + 
  theme_minimal() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position = "bottom",
        legend.background = element_rect(linetype = "solid", colour = "grey"),
        legend.key.size = unit(0.75, "cm"),
        legend.text = element_text(size = 12),
        axis.text.y = element_text(size = 10, color = "black"),
        axis.text.x = element_text(angle = 45, hjust=0),
        plot.margin = unit(c(1,1,1,0), "cm")
  ) +
  ylab("Study ID") + 
  xlab("CEESAT question") 
scoresplot
```